# if (!require("renv")) install.packages("renv")
# library(renv)
# renv::restore()
library(here)
here() starts at C:/Users/Marcony1/OneDrive - Fundacion Universidad de las Americas Puebla/Documents/MDS/Block 6/DSCI 532/DSCI_532_individual-assignment_marcony1
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(readr)
library(arrow)
Warning: package ‘arrow’ was built under R version 4.3.3
Attaching package: ‘arrow’
The following object is masked from ‘package:utils’:
timestamp
zip_file <- here("data", "raw", "iter_00_cpv2020_csv.zip")
temp_dir <- here("temp")
dir.create(temp_dir, showWarnings = FALSE)
unzip(zip_file, files = c("iter_00_cpv2020/conjunto_de_datos/conjunto_de_datos_iter_00CSV20.csv", "iter_00_cpv2020/diccionario_datos/diccionario_datos_iter_00CSV20.csv"), exdir = temp_dir)
data_path <- here(temp_dir,
"iter_00_cpv2020",
"conjunto_de_datos",
"conjunto_de_datos_iter_00CSV20.csv")
dict_path <- here(temp_dir,
"iter_00_cpv2020",
"diccionario_datos",
"diccionario_datos_iter_00CSV20.csv")
info_dict <- read_csv(dict_path)
New names:Rows: 290 Columns: 10── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): ...1, ...2, ...3, ...4, ...5, ...6
lgl (4): ...7, ...8, ...9, ...10
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- read_csv(data_path)
Rows: 195662 Columns: 286── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (283): ENTIDAD, NOM_ENT, MUN, NOM_MUN, LOC, NOM_LOC, LONGITUD, LATITUD, ALTITUD, POBFEM, POBMAS, P_0A2, P_0A2_F, P_0A2_M, P_3YMAS, P_3YMAS_F, P_3Y...
dbl (3): POBTOT, VIVTOT, TVIVHAB
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
unlink(temp_dir, recursive = TRUE)
# Exporting dictionary file
write_csv(info_dict,
here("data", "raw", "diccionario_datos_iter_00CSV20.csv"))
head(df)
head(info_dict)
str(df)
spc_tbl_ [195,662 × 286] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ ENTIDAD : chr [1:195662] "00" "00" "00" "01" ...
$ NOM_ENT : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Aguascalientes" ...
$ MUN : chr [1:195662] "000" "000" "000" "000" ...
$ NOM_MUN : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Total de la entidad Aguascalientes" ...
$ LOC : chr [1:195662] "0000" "9998" "9999" "0000" ...
$ NOM_LOC : chr [1:195662] "Total nacional" "Localidades de una vivienda" "Localidades de dos viviendas" "Total de la Entidad" ...
$ LONGITUD : chr [1:195662] NA NA NA NA ...
$ LATITUD : chr [1:195662] NA NA NA NA ...
$ ALTITUD : chr [1:195662] NA NA NA NA ...
$ POBTOT : num [1:195662] 1.26e+08 2.50e+05 1.47e+05 1.43e+06 3.70e+03 ...
$ POBFEM : chr [1:195662] "64540634" "96869" "61324" "728924" ...
$ POBMAS : chr [1:195662] "61473390" "153485" "85801" "696683" ...
$ P_0A2 : chr [1:195662] "5764054" "10493" "6798" "71864" ...
$ P_0A2_F : chr [1:195662] "2848875" "5193" "3407" "35604" ...
$ P_0A2_M : chr [1:195662] "2915179" "5300" "3391" "36260" ...
$ P_3YMAS : chr [1:195662] "119976584" "239441" "139757" "1352235" ...
$ P_3YMAS_F : chr [1:195662] "61554567" "91463" "57628" "692561" ...
$ P_3YMAS_M : chr [1:195662] "58422017" "147978" "82129" "659674" ...
$ P_5YMAS : chr [1:195662] "115693273" "232086" "135028" "1299669" ...
$ P_5YMAS_F : chr [1:195662] "59433559" "87931" "55256" "666713" ...
$ P_5YMAS_M : chr [1:195662] "56259714" "144155" "79772" "632956" ...
$ P_12YMAS : chr [1:195662] "100528155" "207748" "119223" "1116719" ...
$ P_12YMAS_F : chr [1:195662] "51962264" "76111" "47543" "576593" ...
$ P_12YMAS_M : chr [1:195662] "48565891" "131637" "71680" "540126" ...
$ P_15YMAS : chr [1:195662] "93985354" "197411" "111530" "1038904" ...
$ P_15YMAS_F : chr [1:195662] "48732991" "71344" "44275" "538387" ...
$ P_15YMAS_M : chr [1:195662] "45252363" "126067" "67255" "500517" ...
$ P_18YMAS : chr [1:195662] "87492680" "186968" "104612" "960764" ...
$ P_18YMAS_F : chr [1:195662] "45530857" "66514" "41184" "500089" ...
$ P_18YMAS_M : chr [1:195662] "41961823" "120454" "63428" "460675" ...
$ P_3A5 : chr [1:195662] "6462212" "10900" "7028" "78833" ...
$ P_3A5_F : chr [1:195662] "3193548" "5270" "3511" "38679" ...
$ P_3A5_M : chr [1:195662] "3268664" "5630" "3517" "40154" ...
$ P_6A11 : chr [1:195662] "12986217" "20793" "13506" "156683" ...
$ P_6A11_F : chr [1:195662] "6398755" "10082" "6574" "77289" ...
$ P_6A11_M : chr [1:195662] "6587462" "10711" "6932" "79394" ...
$ P_8A14 : chr [1:195662] "15287375" "24342" "16724" "181905" ...
$ P_8A14_F : chr [1:195662] "7531118" "11538" "7679" "89383" ...
$ P_8A14_M : chr [1:195662] "7756257" "12804" "9045" "92522" ...
$ P_12A14 : chr [1:195662] "6542801" "10337" "7693" "77815" ...
$ P_12A14_F : chr [1:195662] "3229273" "4767" "3268" "38206" ...
$ P_12A14_M : chr [1:195662] "3313528" "5570" "4425" "39609" ...
$ P_15A17 : chr [1:195662] "6492674" "10443" "6918" "78140" ...
$ P_15A17_F : chr [1:195662] "3202134" "4830" "3091" "38298" ...
$ P_15A17_M : chr [1:195662] "3290540" "5613" "3827" "39842" ...
$ P_18A24 : chr [1:195662] "14736111" "27841" "16336" "180847" ...
$ P_18A24_F : chr [1:195662] "7398617" "11140" "6760" "90632" ...
$ P_18A24_M : chr [1:195662] "7337494" "16701" "9576" "90215" ...
$ P_15A49_F : chr [1:195662] "33885546" "47693" "29297" "388917" ...
$ P_60YMAS : chr [1:195662] "15142976" "37383" "21277" "145376" ...
$ P_60YMAS_F : chr [1:195662] "8139094" "13442" "8916" "78703" ...
$ P_60YMAS_M : chr [1:195662] "7003882" "23941" "12361" "66673" ...
$ REL_H_M : chr [1:195662] "95.25" "158.45" "139.91" "95.58" ...
$ POB0_14 : chr [1:195662] "31755284" "52523" "35025" "385195" ...
$ POB15_64 : chr [1:195662] "83663440" "171209" "96250" "941834" ...
$ POB65_MAS : chr [1:195662] "10321914" "26202" "15280" "97070" ...
$ P_0A4 : chr [1:195662] "10047365" "17848" "11527" "124430" ...
$ P_0A4_F : chr [1:195662] "4969883" "8725" "5779" "61452" ...
$ P_0A4_M : chr [1:195662] "5077482" "9123" "5748" "62978" ...
$ P_5A9 : chr [1:195662] "10764379" "17380" "11274" "131048" ...
$ P_5A9_F : chr [1:195662] "5311288" "8526" "5558" "64689" ...
$ P_5A9_M : chr [1:195662] "5453091" "8854" "5716" "66359" ...
$ P_10A14 : chr [1:195662] "10943540" "17295" "12224" "129717" ...
$ P_10A14_F : chr [1:195662] "5389280" "8061" "5423" "63637" ...
$ P_10A14_M : chr [1:195662] "5554260" "9234" "6801" "66080" ...
$ P_15A19 : chr [1:195662] "10806690" "18303" "11484" "131967" ...
$ P_15A19_F : chr [1:195662] "5344540" "8138" "5140" "65064" ...
$ P_15A19_M : chr [1:195662] "5462150" "10165" "6344" "66903" ...
$ P_20A24 : chr [1:195662] "10422095" "19981" "11770" "127020" ...
$ P_20A24_F : chr [1:195662] "5256211" "7832" "4711" "63866" ...
$ P_20A24_M : chr [1:195662] "5165884" "12149" "7059" "63154" ...
$ P_25A29 : chr [1:195662] "9993001" "20584" "12238" "118426" ...
$ P_25A29_F : chr [1:195662] "5131597" "7125" "4427" "60285" ...
$ P_25A29_M : chr [1:195662] "4861404" "13459" "7811" "58141" ...
$ P_30A34 : chr [1:195662] "9420827" "19601" "11315" "106825" ...
$ P_30A34_F : chr [1:195662] "4893101" "6309" "4074" "55174" ...
$ P_30A34_M : chr [1:195662] "4527726" "13292" "7241" "51651" ...
$ P_35A39 : chr [1:195662] "9020276" "18645" "10357" "99257" ...
$ P_35A39_F : chr [1:195662] "4688746" "6289" "3825" "51483" ...
$ P_35A39_M : chr [1:195662] "4331530" "12356" "6532" "47774" ...
$ P_40A44 : chr [1:195662] "8503586" "17934" "9705" "92378" ...
$ P_40A44_F : chr [1:195662] "4441282" "6060" "3743" "48539" ...
$ P_40A44_M : chr [1:195662] "4062304" "11874" "5962" "43839" ...
$ P_45A49 : chr [1:195662] "7942413" "16840" "8668" "84669" ...
$ P_45A49_F : chr [1:195662] "4130069" "5940" "3377" "44506" ...
$ P_45A49_M : chr [1:195662] "3812344" "10900" "5291" "40163" ...
$ P_50A54 : chr [1:195662] "7037532" "15070" "7878" "74121" ...
$ P_50A54_F : chr [1:195662] "3705369" "5481" "3239" "39510" ...
$ P_50A54_M : chr [1:195662] "3332163" "9589" "4639" "34611" ...
$ P_55A59 : chr [1:195662] "5695958" "13070" "6838" "58865" ...
$ P_55A59_F : chr [1:195662] "3002982" "4728" "2823" "31257" ...
$ P_55A59_M : chr [1:195662] "2692976" "8342" "4015" "27608" ...
$ P_60A64 : chr [1:195662] "4821062" "11181" "5997" "48306" ...
$ P_60A64_F : chr [1:195662] "2563200" "4050" "2511" "25871" ...
$ P_60A64_M : chr [1:195662] "2257862" "7131" "3486" "22435" ...
$ P_65A69 : chr [1:195662] "3645077" "9160" "5052" "35823" ...
$ P_65A69_F : chr [1:195662] "1938227" "3343" "2130" "19125" ...
$ P_65A69_M : chr [1:195662] "1706850" "5817" "2922" "16698" ...
$ P_70A74 : chr [1:195662] "2647340" "6903" "3852" "25586" ...
[list output truncated]
- attr(*, "spec")=
.. cols(
.. ENTIDAD = col_character(),
.. NOM_ENT = col_character(),
.. MUN = col_character(),
.. NOM_MUN = col_character(),
.. LOC = col_character(),
.. NOM_LOC = col_character(),
.. LONGITUD = col_character(),
.. LATITUD = col_character(),
.. ALTITUD = col_character(),
.. POBTOT = col_double(),
.. POBFEM = col_character(),
.. POBMAS = col_character(),
.. P_0A2 = col_character(),
.. P_0A2_F = col_character(),
.. P_0A2_M = col_character(),
.. P_3YMAS = col_character(),
.. P_3YMAS_F = col_character(),
.. P_3YMAS_M = col_character(),
.. P_5YMAS = col_character(),
.. P_5YMAS_F = col_character(),
.. P_5YMAS_M = col_character(),
.. P_12YMAS = col_character(),
.. P_12YMAS_F = col_character(),
.. P_12YMAS_M = col_character(),
.. P_15YMAS = col_character(),
.. P_15YMAS_F = col_character(),
.. P_15YMAS_M = col_character(),
.. P_18YMAS = col_character(),
.. P_18YMAS_F = col_character(),
.. P_18YMAS_M = col_character(),
.. P_3A5 = col_character(),
.. P_3A5_F = col_character(),
.. P_3A5_M = col_character(),
.. P_6A11 = col_character(),
.. P_6A11_F = col_character(),
.. P_6A11_M = col_character(),
.. P_8A14 = col_character(),
.. P_8A14_F = col_character(),
.. P_8A14_M = col_character(),
.. P_12A14 = col_character(),
.. P_12A14_F = col_character(),
.. P_12A14_M = col_character(),
.. P_15A17 = col_character(),
.. P_15A17_F = col_character(),
.. P_15A17_M = col_character(),
.. P_18A24 = col_character(),
.. P_18A24_F = col_character(),
.. P_18A24_M = col_character(),
.. P_15A49_F = col_character(),
.. P_60YMAS = col_character(),
.. P_60YMAS_F = col_character(),
.. P_60YMAS_M = col_character(),
.. REL_H_M = col_character(),
.. POB0_14 = col_character(),
.. POB15_64 = col_character(),
.. POB65_MAS = col_character(),
.. P_0A4 = col_character(),
.. P_0A4_F = col_character(),
.. P_0A4_M = col_character(),
.. P_5A9 = col_character(),
.. P_5A9_F = col_character(),
.. P_5A9_M = col_character(),
.. P_10A14 = col_character(),
.. P_10A14_F = col_character(),
.. P_10A14_M = col_character(),
.. P_15A19 = col_character(),
.. P_15A19_F = col_character(),
.. P_15A19_M = col_character(),
.. P_20A24 = col_character(),
.. P_20A24_F = col_character(),
.. P_20A24_M = col_character(),
.. P_25A29 = col_character(),
.. P_25A29_F = col_character(),
.. P_25A29_M = col_character(),
.. P_30A34 = col_character(),
.. P_30A34_F = col_character(),
.. P_30A34_M = col_character(),
.. P_35A39 = col_character(),
.. P_35A39_F = col_character(),
.. P_35A39_M = col_character(),
.. P_40A44 = col_character(),
.. P_40A44_F = col_character(),
.. P_40A44_M = col_character(),
.. P_45A49 = col_character(),
.. P_45A49_F = col_character(),
.. P_45A49_M = col_character(),
.. P_50A54 = col_character(),
.. P_50A54_F = col_character(),
.. P_50A54_M = col_character(),
.. P_55A59 = col_character(),
.. P_55A59_F = col_character(),
.. P_55A59_M = col_character(),
.. P_60A64 = col_character(),
.. P_60A64_F = col_character(),
.. P_60A64_M = col_character(),
.. P_65A69 = col_character(),
.. P_65A69_F = col_character(),
.. P_65A69_M = col_character(),
.. P_70A74 = col_character(),
.. P_70A74_F = col_character(),
.. P_70A74_M = col_character(),
.. P_75A79 = col_character(),
.. P_75A79_F = col_character(),
.. P_75A79_M = col_character(),
.. P_80A84 = col_character(),
.. P_80A84_F = col_character(),
.. P_80A84_M = col_character(),
.. P_85YMAS = col_character(),
.. P_85YMAS_F = col_character(),
.. P_85YMAS_M = col_character(),
.. PROM_HNV = col_character(),
.. PNACENT = col_character(),
.. PNACENT_F = col_character(),
.. PNACENT_M = col_character(),
.. PNACOE = col_character(),
.. PNACOE_F = col_character(),
.. PNACOE_M = col_character(),
.. PRES2015 = col_character(),
.. PRES2015_F = col_character(),
.. PRES2015_M = col_character(),
.. PRESOE15 = col_character(),
.. PRESOE15_F = col_character(),
.. PRESOE15_M = col_character(),
.. P3YM_HLI = col_character(),
.. P3YM_HLI_F = col_character(),
.. P3YM_HLI_M = col_character(),
.. P3HLINHE = col_character(),
.. P3HLINHE_F = col_character(),
.. P3HLINHE_M = col_character(),
.. P3HLI_HE = col_character(),
.. P3HLI_HE_F = col_character(),
.. P3HLI_HE_M = col_character(),
.. P5_HLI = col_character(),
.. P5_HLI_NHE = col_character(),
.. P5_HLI_HE = col_character(),
.. PHOG_IND = col_character(),
.. POB_AFRO = col_character(),
.. POB_AFRO_F = col_character(),
.. POB_AFRO_M = col_character(),
.. PCON_DISC = col_character(),
.. PCDISC_MOT = col_character(),
.. PCDISC_VIS = col_character(),
.. PCDISC_LENG = col_character(),
.. PCDISC_AUD = col_character(),
.. PCDISC_MOT2 = col_character(),
.. PCDISC_MEN = col_character(),
.. PCON_LIMI = col_character(),
.. PCLIM_CSB = col_character(),
.. PCLIM_VIS = col_character(),
.. PCLIM_HACO = col_character(),
.. PCLIM_OAUD = col_character(),
.. PCLIM_MOT2 = col_character(),
.. PCLIM_RE_CO = col_character(),
.. PCLIM_PMEN = col_character(),
.. PSIND_LIM = col_character(),
.. P3A5_NOA = col_character(),
.. P3A5_NOA_F = col_character(),
.. P3A5_NOA_M = col_character(),
.. P6A11_NOA = col_character(),
.. P6A11_NOAF = col_character(),
.. P6A11_NOAM = col_character(),
.. P12A14NOA = col_character(),
.. P12A14NOAF = col_character(),
.. P12A14NOAM = col_character(),
.. P15A17A = col_character(),
.. P15A17A_F = col_character(),
.. P15A17A_M = col_character(),
.. P18A24A = col_character(),
.. P18A24A_F = col_character(),
.. P18A24A_M = col_character(),
.. P8A14AN = col_character(),
.. P8A14AN_F = col_character(),
.. P8A14AN_M = col_character(),
.. P15YM_AN = col_character(),
.. P15YM_AN_F = col_character(),
.. P15YM_AN_M = col_character(),
.. P15YM_SE = col_character(),
.. P15YM_SE_F = col_character(),
.. P15YM_SE_M = col_character(),
.. P15PRI_IN = col_character(),
.. P15PRI_INF = col_character(),
.. P15PRI_INM = col_character(),
.. P15PRI_CO = col_character(),
.. P15PRI_COF = col_character(),
.. P15PRI_COM = col_character(),
.. P15SEC_IN = col_character(),
.. P15SEC_INF = col_character(),
.. P15SEC_INM = col_character(),
.. P15SEC_CO = col_character(),
.. P15SEC_COF = col_character(),
.. P15SEC_COM = col_character(),
.. P18YM_PB = col_character(),
.. P18YM_PB_F = col_character(),
.. P18YM_PB_M = col_character(),
.. GRAPROES = col_character(),
.. GRAPROES_F = col_character(),
.. GRAPROES_M = col_character(),
.. PEA = col_character(),
.. PEA_F = col_character(),
.. PEA_M = col_character(),
.. PE_INAC = col_character(),
.. PE_INAC_F = col_character(),
.. PE_INAC_M = col_character(),
.. POCUPADA = col_character(),
.. POCUPADA_F = col_character(),
.. POCUPADA_M = col_character(),
.. PDESOCUP = col_character(),
.. PDESOCUP_F = col_character(),
.. PDESOCUP_M = col_character(),
.. PSINDER = col_character(),
.. PDER_SS = col_character(),
.. PDER_IMSS = col_character(),
.. PDER_ISTE = col_character(),
.. PDER_ISTEE = col_character(),
.. PAFIL_PDOM = col_character(),
.. PDER_SEGP = col_character(),
.. PDER_IMSSB = col_character(),
.. PAFIL_IPRIV = col_character(),
.. PAFIL_OTRAI = col_character(),
.. P12YM_SOLT = col_character(),
.. P12YM_CASA = col_character(),
.. P12YM_SEPA = col_character(),
.. PCATOLICA = col_character(),
.. PRO_CRIEVA = col_character(),
.. POTRAS_REL = col_character(),
.. PSIN_RELIG = col_character(),
.. TOTHOG = col_character(),
.. HOGJEF_F = col_character(),
.. HOGJEF_M = col_character(),
.. POBHOG = col_character(),
.. PHOGJEF_F = col_character(),
.. PHOGJEF_M = col_character(),
.. VIVTOT = col_double(),
.. TVIVHAB = col_double(),
.. TVIVPAR = col_character(),
.. VIVPAR_HAB = col_character(),
.. VIVPARH_CV = col_character(),
.. TVIVPARHAB = col_character(),
.. VIVPAR_DES = col_character(),
.. VIVPAR_UT = col_character(),
.. OCUPVIVPAR = col_character(),
.. PROM_OCUP = col_character(),
.. PRO_OCUP_C = col_character(),
.. VPH_PISODT = col_character(),
.. VPH_PISOTI = col_character(),
.. VPH_1DOR = col_character(),
.. VPH_2YMASD = col_character(),
.. VPH_1CUART = col_character(),
.. VPH_2CUART = col_character(),
.. VPH_3YMASC = col_character(),
.. VPH_C_ELEC = col_character(),
.. VPH_S_ELEC = col_character(),
.. VPH_AGUADV = col_character(),
.. VPH_AEASP = col_character(),
.. VPH_AGUAFV = col_character(),
.. VPH_TINACO = col_character(),
.. VPH_CISTER = col_character(),
.. VPH_EXCSA = col_character(),
.. VPH_LETR = col_character(),
.. VPH_DRENAJ = col_character(),
.. VPH_NODREN = col_character(),
.. VPH_C_SERV = col_character(),
.. VPH_NDEAED = col_character(),
.. VPH_DSADMA = col_character(),
.. VPH_NDACMM = col_character(),
.. VPH_SNBIEN = col_character(),
.. VPH_REFRI = col_character(),
.. VPH_LAVAD = col_character(),
.. VPH_HMICRO = col_character(),
.. VPH_AUTOM = col_character(),
.. VPH_MOTO = col_character(),
.. VPH_BICI = col_character(),
.. VPH_RADIO = col_character(),
.. VPH_TV = col_character(),
.. VPH_PC = col_character(),
.. VPH_TELEF = col_character(),
.. VPH_CEL = col_character(),
.. VPH_INTER = col_character(),
.. VPH_STVP = col_character(),
.. VPH_SPMVPI = col_character(),
.. VPH_CVJ = col_character(),
.. VPH_SINRTV = col_character(),
.. VPH_SINLTC = col_character(),
.. VPH_SINCINT = col_character(),
.. VPH_SINTIC = col_character(),
.. TAMLOC = col_character()
.. )
- attr(*, "problems")=<externalptr>
clean_info_dict <- info_dict[-c(1:3), ]
names(clean_info_dict) <- clean_info_dict[1, ]
Warning: The `value` argument of `names<-` can't be empty as of tibble 3.0.0.Warning: The `value` argument of `names<-` must be a character vector as of tibble 3.0.0.
clean_info_dict <- clean_info_dict[-1,]
clean_info_dict <- clean_info_dict[, -c(7:10)]
clean_info_dict
replacement_dict <- c(
"Coahuila de Zaragoza" = "Coahuila",
"Michoacán de Ocampo" = "Michoacán",
"Veracruz de Ignacio de la Llave" = "Veracruz",
"México" = "Estado de México"
)
df_new_names <- df |>
mutate(NOM_ENT = ifelse(NOM_ENT %in% names(replacement_dict),
replacement_dict[NOM_ENT],
NOM_ENT))
unique_states <- df_new_names |>
distinct(NOM_ENT)
write_csv(unique_states, here("data", "processed", "entity_names.csv"))
unique_states
entities_csv <- read_csv(here("data", "processed", "entity_names.csv")) |> pull()
Rows: 33 Columns: 1── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): NOM_ENT
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(entities_csv)
[1] "Total nacional" "Aguascalientes" "Baja California" "Baja California Sur" "Campeche" "Coahuila"
[7] "Colima" "Chiapas" "Chihuahua" "Ciudad de México" "Durango" "Guanajuato"
[13] "Guerrero" "Hidalgo" "Jalisco" "Estado de México" "Michoacán" "Morelos"
[19] "Nayarit" "Nuevo León" "Oaxaca" "Puebla" "Querétaro" "Quintana Roo"
[25] "San Luis Potosí" "Sinaloa" "Sonora" "Tabasco" "Tamaulipas" "Tlaxcala"
[31] "Veracruz" "Yucatán" "Zacatecas"
rows_to_include <- c(2, 4, 6:8, 10, 53:117)
filtered_data <- clean_info_dict |>
filter(row_number() %in% rows_to_include) |>
pull(4)
filtered_data
[1] "NOM_ENT" "NOM_MUN" "NOM_LOC" "LONGITUD" "LATITUD" "POBTOT" "REL_H_M" "POB0_14" "POB15_64" "POB65_MAS" "P_0A4"
[12] "P_0A4_F" "P_0A4_M" "P_5A9" "P_5A9_F" "P_5A9_M" "P_10A14" "P_10A14_F" "P_10A14_M" "P_15A19" "P_15A19_F" "P_15A19_M"
[23] "P_20A24" "P_20A24_F" "P_20A24_M" "P_25A29" "P_25A29_F" "P_25A29_M" "P_30A34" "P_30A34_F" "P_30A34_M" "P_35A39" "P_35A39_F"
[34] "P_35A39_M" "P_40A44" "P_40A44_F" "P_40A44_M" "P_45A49" "P_45A49_F" "P_45A49_M" "P_50A54" "P_50A54_F" "P_50A54_M" "P_55A59"
[45] "P_55A59_F" "P_55A59_M" "P_60A64" "P_60A64_F" "P_60A64_M" "P_65A69" "P_65A69_F" "P_65A69_M" "P_70A74" "P_70A74_F" "P_70A74_M"
[56] "P_75A79" "P_75A79_F" "P_75A79_M" "P_80A84" "P_80A84_F" "P_80A84_M" "P_85YMAS" "P_85YMAS_F" "P_85YMAS_M" "PROM_HNV" "PNACENT"
[67] "PNACENT_F" "PNACENT_M" "PNACOE" "PNACOE_F" "PNACOE_M"
selected_df <- df_new_names |>
select(filtered_data)
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(filtered_data)
# Now:
data %>% select(all_of(filtered_data))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
selected_df
str(df)
spc_tbl_ [195,662 × 286] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ ENTIDAD : chr [1:195662] "00" "00" "00" "01" ...
$ NOM_ENT : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Aguascalientes" ...
$ MUN : chr [1:195662] "000" "000" "000" "000" ...
$ NOM_MUN : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Total de la entidad Aguascalientes" ...
$ LOC : chr [1:195662] "0000" "9998" "9999" "0000" ...
$ NOM_LOC : chr [1:195662] "Total nacional" "Localidades de una vivienda" "Localidades de dos viviendas" "Total de la Entidad" ...
$ LONGITUD : chr [1:195662] NA NA NA NA ...
$ LATITUD : chr [1:195662] NA NA NA NA ...
$ ALTITUD : chr [1:195662] NA NA NA NA ...
$ POBTOT : num [1:195662] 1.26e+08 2.50e+05 1.47e+05 1.43e+06 3.70e+03 ...
$ POBFEM : chr [1:195662] "64540634" "96869" "61324" "728924" ...
$ POBMAS : chr [1:195662] "61473390" "153485" "85801" "696683" ...
$ P_0A2 : chr [1:195662] "5764054" "10493" "6798" "71864" ...
$ P_0A2_F : chr [1:195662] "2848875" "5193" "3407" "35604" ...
$ P_0A2_M : chr [1:195662] "2915179" "5300" "3391" "36260" ...
$ P_3YMAS : chr [1:195662] "119976584" "239441" "139757" "1352235" ...
$ P_3YMAS_F : chr [1:195662] "61554567" "91463" "57628" "692561" ...
$ P_3YMAS_M : chr [1:195662] "58422017" "147978" "82129" "659674" ...
$ P_5YMAS : chr [1:195662] "115693273" "232086" "135028" "1299669" ...
$ P_5YMAS_F : chr [1:195662] "59433559" "87931" "55256" "666713" ...
$ P_5YMAS_M : chr [1:195662] "56259714" "144155" "79772" "632956" ...
$ P_12YMAS : chr [1:195662] "100528155" "207748" "119223" "1116719" ...
$ P_12YMAS_F : chr [1:195662] "51962264" "76111" "47543" "576593" ...
$ P_12YMAS_M : chr [1:195662] "48565891" "131637" "71680" "540126" ...
$ P_15YMAS : chr [1:195662] "93985354" "197411" "111530" "1038904" ...
$ P_15YMAS_F : chr [1:195662] "48732991" "71344" "44275" "538387" ...
$ P_15YMAS_M : chr [1:195662] "45252363" "126067" "67255" "500517" ...
$ P_18YMAS : chr [1:195662] "87492680" "186968" "104612" "960764" ...
$ P_18YMAS_F : chr [1:195662] "45530857" "66514" "41184" "500089" ...
$ P_18YMAS_M : chr [1:195662] "41961823" "120454" "63428" "460675" ...
$ P_3A5 : chr [1:195662] "6462212" "10900" "7028" "78833" ...
$ P_3A5_F : chr [1:195662] "3193548" "5270" "3511" "38679" ...
$ P_3A5_M : chr [1:195662] "3268664" "5630" "3517" "40154" ...
$ P_6A11 : chr [1:195662] "12986217" "20793" "13506" "156683" ...
$ P_6A11_F : chr [1:195662] "6398755" "10082" "6574" "77289" ...
$ P_6A11_M : chr [1:195662] "6587462" "10711" "6932" "79394" ...
$ P_8A14 : chr [1:195662] "15287375" "24342" "16724" "181905" ...
$ P_8A14_F : chr [1:195662] "7531118" "11538" "7679" "89383" ...
$ P_8A14_M : chr [1:195662] "7756257" "12804" "9045" "92522" ...
$ P_12A14 : chr [1:195662] "6542801" "10337" "7693" "77815" ...
$ P_12A14_F : chr [1:195662] "3229273" "4767" "3268" "38206" ...
$ P_12A14_M : chr [1:195662] "3313528" "5570" "4425" "39609" ...
$ P_15A17 : chr [1:195662] "6492674" "10443" "6918" "78140" ...
$ P_15A17_F : chr [1:195662] "3202134" "4830" "3091" "38298" ...
$ P_15A17_M : chr [1:195662] "3290540" "5613" "3827" "39842" ...
$ P_18A24 : chr [1:195662] "14736111" "27841" "16336" "180847" ...
$ P_18A24_F : chr [1:195662] "7398617" "11140" "6760" "90632" ...
$ P_18A24_M : chr [1:195662] "7337494" "16701" "9576" "90215" ...
$ P_15A49_F : chr [1:195662] "33885546" "47693" "29297" "388917" ...
$ P_60YMAS : chr [1:195662] "15142976" "37383" "21277" "145376" ...
$ P_60YMAS_F : chr [1:195662] "8139094" "13442" "8916" "78703" ...
$ P_60YMAS_M : chr [1:195662] "7003882" "23941" "12361" "66673" ...
$ REL_H_M : chr [1:195662] "95.25" "158.45" "139.91" "95.58" ...
$ POB0_14 : chr [1:195662] "31755284" "52523" "35025" "385195" ...
$ POB15_64 : chr [1:195662] "83663440" "171209" "96250" "941834" ...
$ POB65_MAS : chr [1:195662] "10321914" "26202" "15280" "97070" ...
$ P_0A4 : chr [1:195662] "10047365" "17848" "11527" "124430" ...
$ P_0A4_F : chr [1:195662] "4969883" "8725" "5779" "61452" ...
$ P_0A4_M : chr [1:195662] "5077482" "9123" "5748" "62978" ...
$ P_5A9 : chr [1:195662] "10764379" "17380" "11274" "131048" ...
$ P_5A9_F : chr [1:195662] "5311288" "8526" "5558" "64689" ...
$ P_5A9_M : chr [1:195662] "5453091" "8854" "5716" "66359" ...
$ P_10A14 : chr [1:195662] "10943540" "17295" "12224" "129717" ...
$ P_10A14_F : chr [1:195662] "5389280" "8061" "5423" "63637" ...
$ P_10A14_M : chr [1:195662] "5554260" "9234" "6801" "66080" ...
$ P_15A19 : chr [1:195662] "10806690" "18303" "11484" "131967" ...
$ P_15A19_F : chr [1:195662] "5344540" "8138" "5140" "65064" ...
$ P_15A19_M : chr [1:195662] "5462150" "10165" "6344" "66903" ...
$ P_20A24 : chr [1:195662] "10422095" "19981" "11770" "127020" ...
$ P_20A24_F : chr [1:195662] "5256211" "7832" "4711" "63866" ...
$ P_20A24_M : chr [1:195662] "5165884" "12149" "7059" "63154" ...
$ P_25A29 : chr [1:195662] "9993001" "20584" "12238" "118426" ...
$ P_25A29_F : chr [1:195662] "5131597" "7125" "4427" "60285" ...
$ P_25A29_M : chr [1:195662] "4861404" "13459" "7811" "58141" ...
$ P_30A34 : chr [1:195662] "9420827" "19601" "11315" "106825" ...
$ P_30A34_F : chr [1:195662] "4893101" "6309" "4074" "55174" ...
$ P_30A34_M : chr [1:195662] "4527726" "13292" "7241" "51651" ...
$ P_35A39 : chr [1:195662] "9020276" "18645" "10357" "99257" ...
$ P_35A39_F : chr [1:195662] "4688746" "6289" "3825" "51483" ...
$ P_35A39_M : chr [1:195662] "4331530" "12356" "6532" "47774" ...
$ P_40A44 : chr [1:195662] "8503586" "17934" "9705" "92378" ...
$ P_40A44_F : chr [1:195662] "4441282" "6060" "3743" "48539" ...
$ P_40A44_M : chr [1:195662] "4062304" "11874" "5962" "43839" ...
$ P_45A49 : chr [1:195662] "7942413" "16840" "8668" "84669" ...
$ P_45A49_F : chr [1:195662] "4130069" "5940" "3377" "44506" ...
$ P_45A49_M : chr [1:195662] "3812344" "10900" "5291" "40163" ...
$ P_50A54 : chr [1:195662] "7037532" "15070" "7878" "74121" ...
$ P_50A54_F : chr [1:195662] "3705369" "5481" "3239" "39510" ...
$ P_50A54_M : chr [1:195662] "3332163" "9589" "4639" "34611" ...
$ P_55A59 : chr [1:195662] "5695958" "13070" "6838" "58865" ...
$ P_55A59_F : chr [1:195662] "3002982" "4728" "2823" "31257" ...
$ P_55A59_M : chr [1:195662] "2692976" "8342" "4015" "27608" ...
$ P_60A64 : chr [1:195662] "4821062" "11181" "5997" "48306" ...
$ P_60A64_F : chr [1:195662] "2563200" "4050" "2511" "25871" ...
$ P_60A64_M : chr [1:195662] "2257862" "7131" "3486" "22435" ...
$ P_65A69 : chr [1:195662] "3645077" "9160" "5052" "35823" ...
$ P_65A69_F : chr [1:195662] "1938227" "3343" "2130" "19125" ...
$ P_65A69_M : chr [1:195662] "1706850" "5817" "2922" "16698" ...
$ P_70A74 : chr [1:195662] "2647340" "6903" "3852" "25586" ...
[list output truncated]
- attr(*, "spec")=
.. cols(
.. ENTIDAD = col_character(),
.. NOM_ENT = col_character(),
.. MUN = col_character(),
.. NOM_MUN = col_character(),
.. LOC = col_character(),
.. NOM_LOC = col_character(),
.. LONGITUD = col_character(),
.. LATITUD = col_character(),
.. ALTITUD = col_character(),
.. POBTOT = col_double(),
.. POBFEM = col_character(),
.. POBMAS = col_character(),
.. P_0A2 = col_character(),
.. P_0A2_F = col_character(),
.. P_0A2_M = col_character(),
.. P_3YMAS = col_character(),
.. P_3YMAS_F = col_character(),
.. P_3YMAS_M = col_character(),
.. P_5YMAS = col_character(),
.. P_5YMAS_F = col_character(),
.. P_5YMAS_M = col_character(),
.. P_12YMAS = col_character(),
.. P_12YMAS_F = col_character(),
.. P_12YMAS_M = col_character(),
.. P_15YMAS = col_character(),
.. P_15YMAS_F = col_character(),
.. P_15YMAS_M = col_character(),
.. P_18YMAS = col_character(),
.. P_18YMAS_F = col_character(),
.. P_18YMAS_M = col_character(),
.. P_3A5 = col_character(),
.. P_3A5_F = col_character(),
.. P_3A5_M = col_character(),
.. P_6A11 = col_character(),
.. P_6A11_F = col_character(),
.. P_6A11_M = col_character(),
.. P_8A14 = col_character(),
.. P_8A14_F = col_character(),
.. P_8A14_M = col_character(),
.. P_12A14 = col_character(),
.. P_12A14_F = col_character(),
.. P_12A14_M = col_character(),
.. P_15A17 = col_character(),
.. P_15A17_F = col_character(),
.. P_15A17_M = col_character(),
.. P_18A24 = col_character(),
.. P_18A24_F = col_character(),
.. P_18A24_M = col_character(),
.. P_15A49_F = col_character(),
.. P_60YMAS = col_character(),
.. P_60YMAS_F = col_character(),
.. P_60YMAS_M = col_character(),
.. REL_H_M = col_character(),
.. POB0_14 = col_character(),
.. POB15_64 = col_character(),
.. POB65_MAS = col_character(),
.. P_0A4 = col_character(),
.. P_0A4_F = col_character(),
.. P_0A4_M = col_character(),
.. P_5A9 = col_character(),
.. P_5A9_F = col_character(),
.. P_5A9_M = col_character(),
.. P_10A14 = col_character(),
.. P_10A14_F = col_character(),
.. P_10A14_M = col_character(),
.. P_15A19 = col_character(),
.. P_15A19_F = col_character(),
.. P_15A19_M = col_character(),
.. P_20A24 = col_character(),
.. P_20A24_F = col_character(),
.. P_20A24_M = col_character(),
.. P_25A29 = col_character(),
.. P_25A29_F = col_character(),
.. P_25A29_M = col_character(),
.. P_30A34 = col_character(),
.. P_30A34_F = col_character(),
.. P_30A34_M = col_character(),
.. P_35A39 = col_character(),
.. P_35A39_F = col_character(),
.. P_35A39_M = col_character(),
.. P_40A44 = col_character(),
.. P_40A44_F = col_character(),
.. P_40A44_M = col_character(),
.. P_45A49 = col_character(),
.. P_45A49_F = col_character(),
.. P_45A49_M = col_character(),
.. P_50A54 = col_character(),
.. P_50A54_F = col_character(),
.. P_50A54_M = col_character(),
.. P_55A59 = col_character(),
.. P_55A59_F = col_character(),
.. P_55A59_M = col_character(),
.. P_60A64 = col_character(),
.. P_60A64_F = col_character(),
.. P_60A64_M = col_character(),
.. P_65A69 = col_character(),
.. P_65A69_F = col_character(),
.. P_65A69_M = col_character(),
.. P_70A74 = col_character(),
.. P_70A74_F = col_character(),
.. P_70A74_M = col_character(),
.. P_75A79 = col_character(),
.. P_75A79_F = col_character(),
.. P_75A79_M = col_character(),
.. P_80A84 = col_character(),
.. P_80A84_F = col_character(),
.. P_80A84_M = col_character(),
.. P_85YMAS = col_character(),
.. P_85YMAS_F = col_character(),
.. P_85YMAS_M = col_character(),
.. PROM_HNV = col_character(),
.. PNACENT = col_character(),
.. PNACENT_F = col_character(),
.. PNACENT_M = col_character(),
.. PNACOE = col_character(),
.. PNACOE_F = col_character(),
.. PNACOE_M = col_character(),
.. PRES2015 = col_character(),
.. PRES2015_F = col_character(),
.. PRES2015_M = col_character(),
.. PRESOE15 = col_character(),
.. PRESOE15_F = col_character(),
.. PRESOE15_M = col_character(),
.. P3YM_HLI = col_character(),
.. P3YM_HLI_F = col_character(),
.. P3YM_HLI_M = col_character(),
.. P3HLINHE = col_character(),
.. P3HLINHE_F = col_character(),
.. P3HLINHE_M = col_character(),
.. P3HLI_HE = col_character(),
.. P3HLI_HE_F = col_character(),
.. P3HLI_HE_M = col_character(),
.. P5_HLI = col_character(),
.. P5_HLI_NHE = col_character(),
.. P5_HLI_HE = col_character(),
.. PHOG_IND = col_character(),
.. POB_AFRO = col_character(),
.. POB_AFRO_F = col_character(),
.. POB_AFRO_M = col_character(),
.. PCON_DISC = col_character(),
.. PCDISC_MOT = col_character(),
.. PCDISC_VIS = col_character(),
.. PCDISC_LENG = col_character(),
.. PCDISC_AUD = col_character(),
.. PCDISC_MOT2 = col_character(),
.. PCDISC_MEN = col_character(),
.. PCON_LIMI = col_character(),
.. PCLIM_CSB = col_character(),
.. PCLIM_VIS = col_character(),
.. PCLIM_HACO = col_character(),
.. PCLIM_OAUD = col_character(),
.. PCLIM_MOT2 = col_character(),
.. PCLIM_RE_CO = col_character(),
.. PCLIM_PMEN = col_character(),
.. PSIND_LIM = col_character(),
.. P3A5_NOA = col_character(),
.. P3A5_NOA_F = col_character(),
.. P3A5_NOA_M = col_character(),
.. P6A11_NOA = col_character(),
.. P6A11_NOAF = col_character(),
.. P6A11_NOAM = col_character(),
.. P12A14NOA = col_character(),
.. P12A14NOAF = col_character(),
.. P12A14NOAM = col_character(),
.. P15A17A = col_character(),
.. P15A17A_F = col_character(),
.. P15A17A_M = col_character(),
.. P18A24A = col_character(),
.. P18A24A_F = col_character(),
.. P18A24A_M = col_character(),
.. P8A14AN = col_character(),
.. P8A14AN_F = col_character(),
.. P8A14AN_M = col_character(),
.. P15YM_AN = col_character(),
.. P15YM_AN_F = col_character(),
.. P15YM_AN_M = col_character(),
.. P15YM_SE = col_character(),
.. P15YM_SE_F = col_character(),
.. P15YM_SE_M = col_character(),
.. P15PRI_IN = col_character(),
.. P15PRI_INF = col_character(),
.. P15PRI_INM = col_character(),
.. P15PRI_CO = col_character(),
.. P15PRI_COF = col_character(),
.. P15PRI_COM = col_character(),
.. P15SEC_IN = col_character(),
.. P15SEC_INF = col_character(),
.. P15SEC_INM = col_character(),
.. P15SEC_CO = col_character(),
.. P15SEC_COF = col_character(),
.. P15SEC_COM = col_character(),
.. P18YM_PB = col_character(),
.. P18YM_PB_F = col_character(),
.. P18YM_PB_M = col_character(),
.. GRAPROES = col_character(),
.. GRAPROES_F = col_character(),
.. GRAPROES_M = col_character(),
.. PEA = col_character(),
.. PEA_F = col_character(),
.. PEA_M = col_character(),
.. PE_INAC = col_character(),
.. PE_INAC_F = col_character(),
.. PE_INAC_M = col_character(),
.. POCUPADA = col_character(),
.. POCUPADA_F = col_character(),
.. POCUPADA_M = col_character(),
.. PDESOCUP = col_character(),
.. PDESOCUP_F = col_character(),
.. PDESOCUP_M = col_character(),
.. PSINDER = col_character(),
.. PDER_SS = col_character(),
.. PDER_IMSS = col_character(),
.. PDER_ISTE = col_character(),
.. PDER_ISTEE = col_character(),
.. PAFIL_PDOM = col_character(),
.. PDER_SEGP = col_character(),
.. PDER_IMSSB = col_character(),
.. PAFIL_IPRIV = col_character(),
.. PAFIL_OTRAI = col_character(),
.. P12YM_SOLT = col_character(),
.. P12YM_CASA = col_character(),
.. P12YM_SEPA = col_character(),
.. PCATOLICA = col_character(),
.. PRO_CRIEVA = col_character(),
.. POTRAS_REL = col_character(),
.. PSIN_RELIG = col_character(),
.. TOTHOG = col_character(),
.. HOGJEF_F = col_character(),
.. HOGJEF_M = col_character(),
.. POBHOG = col_character(),
.. PHOGJEF_F = col_character(),
.. PHOGJEF_M = col_character(),
.. VIVTOT = col_double(),
.. TVIVHAB = col_double(),
.. TVIVPAR = col_character(),
.. VIVPAR_HAB = col_character(),
.. VIVPARH_CV = col_character(),
.. TVIVPARHAB = col_character(),
.. VIVPAR_DES = col_character(),
.. VIVPAR_UT = col_character(),
.. OCUPVIVPAR = col_character(),
.. PROM_OCUP = col_character(),
.. PRO_OCUP_C = col_character(),
.. VPH_PISODT = col_character(),
.. VPH_PISOTI = col_character(),
.. VPH_1DOR = col_character(),
.. VPH_2YMASD = col_character(),
.. VPH_1CUART = col_character(),
.. VPH_2CUART = col_character(),
.. VPH_3YMASC = col_character(),
.. VPH_C_ELEC = col_character(),
.. VPH_S_ELEC = col_character(),
.. VPH_AGUADV = col_character(),
.. VPH_AEASP = col_character(),
.. VPH_AGUAFV = col_character(),
.. VPH_TINACO = col_character(),
.. VPH_CISTER = col_character(),
.. VPH_EXCSA = col_character(),
.. VPH_LETR = col_character(),
.. VPH_DRENAJ = col_character(),
.. VPH_NODREN = col_character(),
.. VPH_C_SERV = col_character(),
.. VPH_NDEAED = col_character(),
.. VPH_DSADMA = col_character(),
.. VPH_NDACMM = col_character(),
.. VPH_SNBIEN = col_character(),
.. VPH_REFRI = col_character(),
.. VPH_LAVAD = col_character(),
.. VPH_HMICRO = col_character(),
.. VPH_AUTOM = col_character(),
.. VPH_MOTO = col_character(),
.. VPH_BICI = col_character(),
.. VPH_RADIO = col_character(),
.. VPH_TV = col_character(),
.. VPH_PC = col_character(),
.. VPH_TELEF = col_character(),
.. VPH_CEL = col_character(),
.. VPH_INTER = col_character(),
.. VPH_STVP = col_character(),
.. VPH_SPMVPI = col_character(),
.. VPH_CVJ = col_character(),
.. VPH_SINRTV = col_character(),
.. VPH_SINLTC = col_character(),
.. VPH_SINCINT = col_character(),
.. VPH_SINTIC = col_character(),
.. TAMLOC = col_character()
.. )
- attr(*, "problems")=<externalptr>
# Export wrangled data as parquet file
table <- arrow::Table$create(selected_df)
output_dir <- here("data", "processed", "parquet_data")
arrow::write_dataset(table, output_dir, partitioning = c("NOM_ENT"), existing_data_behavior = "overwrite")
ds <- open_dataset(here("data", "processed", "parquet_data")) |>
collect()
ds
ds_puebla <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT=="Puebla") |>
collect()
ds_puebla
ds_yucatan <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT=="Yucatán") |>
collect()
ds_yucatan
ds_nuevo_leon <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT=="Nuevo León") |>
collect()
ds_nuevo_leon
ds_nacional <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT=="Total nacional") |>
collect()
ds_nacional
for(value in entities_csv) {
read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT==value) |>
collect()
if (nrow(read_dfs) == 0) {
print(paste("Dataset is empty", value))
} else {
print(paste("OK", value, nrow(read_dfs)))
}
}
[1] "OK Total nacional 3"
[1] "OK Aguascalientes 2058"
[1] "OK Baja California 5566"
[1] "OK Baja California Sur 2561"
[1] "OK Campeche 2800"
[1] "OK Coahuila 4149"
[1] "OK Colima 1259"
[1] "OK Chiapas 21487"
[1] "OK Chihuahua 12389"
[1] "OK Ciudad de México 666"
[1] "OK Durango 6006"
[1] "OK Guanajuato 8945"
[1] "OK Guerrero 7001"
[1] "OK Hidalgo 4916"
[1] "OK Jalisco 10715"
[1] "OK Estado de México 5136"
[1] "OK Michoacán 8956"
[1] "OK Morelos 1678"
[1] "OK Nayarit 2913"
[1] "OK Nuevo León 4974"
[1] "OK Oaxaca 11856"
[1] "OK Puebla 7059"
[1] "OK Querétaro 2249"
[1] "OK Quintana Roo 2243"
[1] "OK San Luis Potosí 6729"
[1] "OK Sinaloa 5552"
[1] "OK Sonora 7500"
[1] "OK Tabasco 2517"
[1] "OK Tamaulipas 6695"
[1] "OK Tlaxcala 1323"
[1] "OK Veracruz 20401"
[1] "OK Yucatán 2691"
[1] "OK Zacatecas 4669"
longitudes <- selected_df$LONGITUD
latitudes <- selected_df$LATITUD
test_long <- longitudes[8]
test_long
[1] "102°17'45.768\" W"
sections <- unlist(strsplit(test_long, "[°'\" ]"))
degrees <- as.numeric(sections[1])
minutes <- as.numeric(sections[2])
seconds <- as.numeric(sections[3])
decimal_degrees <- (degrees + minutes / 60 + seconds / 3600) * -1
decimal_degrees
[1] -102.296
longitude_to_decimal <- function(test_long) {
if (is.na(test_long)) {
return(NA)
}
sections <- unlist(strsplit(test_long, "[°'\" ]"))
degrees <- as.numeric(sections[1])
minutes <- as.numeric(sections[2])
seconds <- as.numeric(sections[3])
decimal_degrees <- (degrees + minutes / 60 + seconds / 3600) * -1
}
latitude_to_decimal <- function(test_lat) {
if (is.na(test_lat)) {
return(NA)
}
sections <- unlist(strsplit(test_lat, "[°'\" ]"))
degrees <- as.numeric(sections[1])
minutes <- as.numeric(sections[2])
seconds <- as.numeric(sections[3])
decimal_degrees <- (degrees + minutes / 60 + seconds / 3600)
}
selected_clean <- selected_df |>
mutate(longitude_decimal = sapply(LONGITUD, longitude_to_decimal),
latitude_decimal = sapply(LATITUD, latitude_to_decimal))
selected_clean
table <- arrow::Table$create(selected_clean)
output_dir <- here("data", "processed", "parquet_data_coords")
arrow::write_dataset(table, output_dir, partitioning = c("NOM_ENT"), existing_data_behavior = "overwrite")
prueba_csv <- read_csv(here("data", "processed", "data_coords.csv"))
Rows: 195662 Columns: 73── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (70): NOM_ENT, NOM_MUN, NOM_LOC, LONGITUD, LATITUD, REL_H_M, POB0_14, POB15_64, POB65_MAS, P_0A4, P_0A4_F, P_0A4_M, P_5A9, P_5A9_F, P_5A9_M, P_10A...
dbl (3): POBTOT, longitude_decimal, latitude_decimal
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
prueba_csv
for(value in entities_csv) {
read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT==value) |>
collect()
if (nrow(read_dfs) == length(unique(read_dfs$NOM_LOC))) {
print(paste("Localities Unique", value))
} else {
print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$NOM_LOC))))
}
}
[1] "Localities Unique Total nacional"
[1] "NOT OK Aguascalientes 2058 <> 1772"
[1] "NOT OK Baja California 5566 <> 4621"
[1] "NOT OK Baja California Sur 2561 <> 1832"
[1] "NOT OK Campeche 2800 <> 1894"
[1] "NOT OK Coahuila 4149 <> 3287"
[1] "NOT OK Colima 1259 <> 1035"
[1] "NOT OK Chiapas 21487 <> 10349"
[1] "NOT OK Chihuahua 12389 <> 8082"
[1] "NOT OK Ciudad de México 666 <> 617"
[1] "NOT OK Durango 6006 <> 4444"
[1] "NOT OK Guanajuato 8945 <> 6923"
[1] "NOT OK Guerrero 7001 <> 5189"
[1] "NOT OK Hidalgo 4916 <> 3690"
[1] "NOT OK Jalisco 10715 <> 6764"
[1] "NOT OK Estado de México 5136 <> 4291"
[1] "NOT OK Michoacán 8956 <> 6065"
[1] "NOT OK Morelos 1678 <> 1471"
[1] "NOT OK Nayarit 2913 <> 2243"
[1] "NOT OK Nuevo León 4974 <> 3328"
[1] "NOT OK Oaxaca 11856 <> 7924"
[1] "NOT OK Puebla 7059 <> 5037"
[1] "NOT OK Querétaro 2249 <> 1885"
[1] "NOT OK Quintana Roo 2243 <> 1832"
[1] "NOT OK San Luis Potosí 6729 <> 5037"
[1] "NOT OK Sinaloa 5552 <> 4064"
[1] "NOT OK Sonora 7500 <> 5710"
[1] "NOT OK Tabasco 2517 <> 2019"
[1] "NOT OK Tamaulipas 6695 <> 4601"
[1] "NOT OK Tlaxcala 1323 <> 1075"
[1] "NOT OK Veracruz 20401 <> 12141"
[1] "NOT OK Yucatán 2691 <> 1790"
[1] "NOT OK Zacatecas 4669 <> 3594"
for(value in entities_csv) {
read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT==value) |>
collect()
read_dfs$NOM_MUN_LOC <- paste(read_dfs$NOM_MUN, read_dfs$NOM_LOC, sep = "_")
if (nrow(read_dfs) == length(unique(read_dfs$NOM_MUN_LOC))) {
print(paste("Localities Unique", value))
} else {
print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$NOM_MUN_LOC))))
}
}
[1] "Localities Unique Total nacional"
[1] "NOT OK Aguascalientes 2058 <> 1979"
[1] "NOT OK Baja California 5566 <> 5074"
[1] "NOT OK Baja California Sur 2561 <> 2212"
[1] "NOT OK Campeche 2800 <> 2368"
[1] "NOT OK Coahuila 4149 <> 4023"
[1] "NOT OK Colima 1259 <> 1177"
[1] "NOT OK Chiapas 21487 <> 18268"
[1] "NOT OK Chihuahua 12389 <> 11167"
[1] "NOT OK Ciudad de México 666 <> 663"
[1] "NOT OK Durango 6006 <> 5578"
[1] "NOT OK Guanajuato 8945 <> 8753"
[1] "NOT OK Guerrero 7001 <> 6870"
[1] "NOT OK Hidalgo 4916 <> 4870"
[1] "NOT OK Jalisco 10715 <> 10393"
[1] "NOT OK Estado de México 5136 <> 5108"
[1] "NOT OK Michoacán 8956 <> 8656"
[1] "NOT OK Morelos 1678 <> 1662"
[1] "NOT OK Nayarit 2913 <> 2726"
[1] "NOT OK Nuevo León 4974 <> 4641"
[1] "NOT OK Oaxaca 11856 <> 11760"
[1] "NOT OK Puebla 7059 <> 6837"
[1] "NOT OK Querétaro 2249 <> 2222"
[1] "NOT OK Quintana Roo 2243 <> 2138"
[1] "NOT OK San Luis Potosí 6729 <> 6590"
[1] "NOT OK Sinaloa 5552 <> 5141"
[1] "NOT OK Sonora 7500 <> 7141"
[1] "NOT OK Tabasco 2517 <> 2399"
[1] "NOT OK Tamaulipas 6695 <> 6265"
[1] "NOT OK Tlaxcala 1323 <> 1315"
[1] "NOT OK Veracruz 20401 <> 19225"
[1] "NOT OK Yucatán 2691 <> 2558"
[1] "NOT OK Zacatecas 4669 <> 4610"
for(value in entities_csv) {
read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT==value) |>
collect()
read_dfs$NOM_LOC_LOC <- paste(read_dfs$LOC, read_dfs$NOM_LOC, sep = "_")
if (nrow(read_dfs) == length(unique(read_dfs$NOM_LOC_LOC))) {
print(paste("Localities Unique", value))
} else {
print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$NOM_LOC_LOC))))
}
}
Warning: Unknown or uninitialised column: `LOC`.
[1] "Localities Unique Total nacional"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Aguascalientes 2058 <> 1772"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Baja California 5566 <> 4621"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Baja California Sur 2561 <> 1832"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Campeche 2800 <> 1894"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Coahuila 4149 <> 3287"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Colima 1259 <> 1035"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Chiapas 21487 <> 10349"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Chihuahua 12389 <> 8082"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Ciudad de México 666 <> 617"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Durango 6006 <> 4444"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Guanajuato 8945 <> 6923"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Guerrero 7001 <> 5189"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Hidalgo 4916 <> 3690"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Jalisco 10715 <> 6764"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Estado de México 5136 <> 4291"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Michoacán 8956 <> 6065"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Morelos 1678 <> 1471"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Nayarit 2913 <> 2243"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Nuevo León 4974 <> 3328"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Oaxaca 11856 <> 7924"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Puebla 7059 <> 5037"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Querétaro 2249 <> 1885"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Quintana Roo 2243 <> 1832"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK San Luis Potosí 6729 <> 5037"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Sinaloa 5552 <> 4064"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Sonora 7500 <> 5710"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Tabasco 2517 <> 2019"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Tamaulipas 6695 <> 4601"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Tlaxcala 1323 <> 1075"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Veracruz 20401 <> 12141"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Yucatán 2691 <> 1790"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Zacatecas 4669 <> 3594"
{# {r} # for(value in entities_csv) { # # read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |> # filter(NOM_ENT==value) |> # collect() # # read_dfs$LOC_MUN <- paste(read_dfs$LOC, read_dfs$MUN, sep = "_") # # if (nrow(read_dfs) == length(unique(read_dfs$LOC_MUN))) { # print(paste("Localities Unique", value)) # } else { # print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$LOC_MUN)))) # # } # # }
for(value in entities_csv) {
read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT==value) |>
collect()
read_dfs$LOC_MUN <- paste(read_dfs$LOC, read_dfs$NOM_MUN, sep = "_")
if (nrow(read_dfs) == length(unique(read_dfs$LOC_MUN))) {
print(paste("Localities Unique", value))
} else {
print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$LOC_MUN))))
}
}
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Total nacional 3 <> 1"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Aguascalientes 2058 <> 12"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Baja California 5566 <> 7"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Baja California Sur 2561 <> 6"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Campeche 2800 <> 13"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Coahuila 4149 <> 39"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Colima 1259 <> 11"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Chiapas 21487 <> 125"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Chihuahua 12389 <> 68"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Ciudad de México 666 <> 17"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Durango 6006 <> 40"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Guanajuato 8945 <> 47"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Guerrero 7001 <> 82"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Hidalgo 4916 <> 85"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Jalisco 10715 <> 126"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Estado de México 5136 <> 126"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Michoacán 8956 <> 114"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Morelos 1678 <> 37"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Nayarit 2913 <> 21"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Nuevo León 4974 <> 52"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Oaxaca 11856 <> 569"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Puebla 7059 <> 218"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Querétaro 2249 <> 19"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Quintana Roo 2243 <> 12"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK San Luis Potosí 6729 <> 59"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Sinaloa 5552 <> 19"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Sonora 7500 <> 73"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Tabasco 2517 <> 18"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Tamaulipas 6695 <> 44"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Tlaxcala 1323 <> 61"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Veracruz 20401 <> 213"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Yucatán 2691 <> 107"
Warning: Unknown or uninitialised column: `LOC`.
[1] "NOT OK Zacatecas 4669 <> 59"